---
title: "Clean Woman Data for US Global Health Aid Policy and Family Planning in Sub-Saharan Africa"
output: html_document
editor_options: 
  chunk_output_type: console
---


# Set Up

```{r packages, warning=FALSE, message=FALSE}
source(here::here("scripts/functions.R"))
setup_plgha(tidylog = TRUE)
```

# Data Cleaning
Use PMA data available from IPUMS PMA. Create an account and follow the instructions in the data README for creating an extract for a "family planning person" (aka woman-level data) with the required variables. Update the code below with the corresponding DDI (.xml) and data (.dat.gz) files from your extract.

```{r prep raw pma data, warning=FALSE, message=FALSE}

dat <- read_ipums_micro(
  ddi = here("data-raw/PMA/pma_women.xml"),
  data = here("data-raw/PMA/pma_women.dat.gz")) %>%
  janitor::clean_names() %>%
  filter(consentfq == 1) %>%
  dplyr::select(-c(hhid, consentfq, consenthq, eligible)) %>%
  rename(survey_year = year)
  
  
dat <-  dat %>%  
  # replace NIU as missing
  mutate(
    across(c(cp, mcp, tcp, fpnowusemrg, durcurpreg, cheb, birthevent,
             fpcurruse, fpuseyr, pregnant, pgdesire, educattgen,
             fpstopwhy, fpystopaccess,  fpystopunavail, fpystopcost, 
             fpevuse, lastdobcmc, fpbeginusecmc, fpstopusecmc, wealtht, 
             wealthq, score, fpstopyr1, fpstopmo1, fpcurreffmeth, 
             fpcurreffmethyr),
           ~lbl_na_if(
             .x,
             ~.lbl %in% c(
               "Logical edit - missing", 
               "Not interviewed (female questionnaire)",
               "Not interviewed (household questionnaire)",
               "Don't know",
               "No response or missing",
               "No response",
               "NIU (not in universe)",
               "NIU (not in universe) or missing")
           )
    ),
    
    # convert to numeric
     across(c(intfqyear, intfqmon, intfqcmc, cp, mcp, tcp, 
              fpcurruse, fpnowusemrg, durcurpreg, cheb, birthevent,
              pgdesire, fplchdesire, pregnant, score, fpstopusecmc,
              fpstopyr1, fpstopmo1, urban),
            ~as.numeric(.)
     ),
            
    # create interview year & month from cmc (for missing ghana sample)
    year = 1900 + as.integer((intfqcmc - 1)/12),
    year = case_when(
      sample == 28806 ~ intfqyear,
      is.na(year) ~ intfqyear,
      T ~ year # year == interview year (survey_year == nominal year)
    ),
    
    intfqcmc = case_when(
       sample == 28806 ~ 12*(intfqyear - 1900) + intfqmon,
       is.na(intfqcmc) ~ 12*(intfqyear - 1900) + intfqmon,
       T ~ intfqcmc
    ),
    
    # replace missing fpstopcmc for 2019 ethiopia & uganda samples
    
    fpstopusecmc = case_when(
      sample %in% c(23108, 80007) ~ 12*(fpstopyr1 - 1900) + fpstopmo1,
      T ~ fpstopusecmc
    ),
    
    # replace missing cheb with birthevent if birthevent is not missing
    cheb = case_when(
      is.na(cheb) & !is.na(birthevent) ~ birthevent,
      T ~ cheb
    ),

    # replace incorrect EAID in UG Round 6
    eaid = ifelse(sample == 80006 & eaid == 3896, 3926, eaid)
  ) %>%

  # filter out anyone who has been sterilized
  filter(fpcurreffmeth > 102 | is.na(fpcurreffmeth)) %>%
  filter(fpcurreffmethyr > 102 | is.na(fpcurreffmethyr)) 

```


## Create Key Variables
```{r}

dat <- dat %>% 
  mutate(
    # update fpcurruse to account for currently pregnant
    fpcurruse = case_when(
      pregnant == 1 ~ 0,
      fpevuse == 0 ~ 0,
      T ~ fpcurruse
    ),
  
    # create indicators for starting/stopping fp
    fpstart = case_when(
      intfqcmc - fpbeginusecmc <=  12 ~ TRUE,
      fpevuse == 0 ~ FALSE),
    
    fpstartlarc = case_when(
      fpstart & fpcurreffmeth %in% c(111,112, 121, 122) | 
        fpstart & fpcurreffmethyr %in% c(111,112,121, 122) ~ TRUE,
      is.na(fpstart) ~ NA,
      T ~ FALSE
    ),
    
    fpstop = case_when(
      !is.na(fpstopusecmc) ~ intfqcmc - fpstopusecmc <= 12,
      fpcurruse == 1 | fpevuse == 0 ~ FALSE),
    
     # tag modern methods
    fpmodern = case_when(
      fpcurruse == 1 & fpcurreffmeth < 200 ~ 1,
      fpcurruse == 1 &  fpcurreffmeth %in% c(200:240) ~ 0,
      T ~ NA_real_
    ),
   
    # tag traditional methods
    fptrad = case_when(
      fpcurruse == 1 & fpcurreffmeth < 200 ~ 0,
      fpcurruse == 1 &  fpcurreffmeth %in% c(200:240) ~ 1,
      T ~ NA_real_
    ),
    
    # tag emergency contraceptive use
    fpemerg = case_when(
      fpcurruse == 1 &  (fpcurreffmeth == 132 | fpnowusemrg == 1) ~ 1,
      fpcurruse == 1 &  (fpcurreffmeth != 132 | fpnowusemrg == 0) ~ 0,
      T ~ NA_real_
    ),
    
    fplarc = case_when(
      fpcurruse == 1 & fpcurreffmeth %in% c(111, 112,121, 122) ~ 1,
      fpcurruse == 1 & fpcurreffmeth %notin% c(111, 112, 121, 122)  ~ 0,
      T ~ NA_real_
    ),
    
    fpshort = case_when(
      fpcurruse == 1 & (fpcurreffmeth >  120 & fpcurreffmeth < 250) ~ 1,
      fpcurruse == 1 & (fpcurreffmeth <120 | fpcurreffmeth >250) ~ 0,
      T ~ NA_real_
    ),
    
    fppill = case_when(
      fpcurruse == 1 & fpcurreffmeth == 131 ~ 1,
      fpcurruse == 1 & fpcurreffmeth != 131 ~ 0,
      T ~ NA_real_
    ),
   
    fpinj = case_when(
      fpcurruse == 1 & fpcurreffmeth %in% c(121, 122) ~ 1,
      fpcurruse == 1 & fpcurreffmeth %notin% c(121, 122) ~ 0,
      T ~ NA_real_
    ),
   
    fpcon = case_when(
      fpcurruse == 1 & fpcurreffmeth == 141 ~ 1,
      fpcurruse == 1 & fpcurreffmeth != 141 ~ 0,
      T ~ NA_real_
    ),
   
   fpfc = case_when(
      fpcurruse == 1 & fpcurreffmeth == 142 ~ 1,
      fpcurruse == 1 & fpcurreffmeth != 142 ~ 0,
      T ~ NA_real_
    ),
   
    fpimp = case_when(
      fpcurruse == 1 & fpcurreffmeth == 111 ~ 1,
      fpcurruse == 1 & fpcurreffmeth != 111 ~ 0,
      T ~ NA_real_
    ),
   
   fpiud = case_when(
      fpcurruse == 1 & fpcurreffmeth == 112 ~ 1,
      fpcurruse == 1 & fpcurreffmeth != 112 ~ 0,
      T ~ NA_real_
    ),
    
    birth1yr = case_when(
      (lastdobcmc < 9000) & (lastdobcmc  >= (intfqcmc - 12)) ~ 1,
      lastdobcmc >9000 ~ NA_real_,
      T ~ 0
    ),
 
    birthyear = 1900 + as.integer((lastdobcmc - 1)/12),
    birthmonth = lastdobcmc - 12*(birthyear - 1900),
    
   urban = case_when(
    is.na(urban) ~ 1, # all missings are from DRC (which only sampled urban EAs)
     T ~ urban
   ),
   richer = case_when(
     wealthq %in% c(3, 4, 5) ~ T,
     T ~ F
   ),
   urban = as.logical(urban),
   married = case_when(
     marstat %in% c(21, 22) ~ T,
     marstat %in% c(10, 31, 32) ~ F,
     T ~ NA
   ),
   young = case_when(
     age >=15 & age <=19 ~ T,
     age >=20 ~ F
   ),
   wealtht = as_factor(wealtht),
   educattgen = as_factor(educattgen)
  ) 

```


## Merge 
Final step of cross-sectional data is to merge in the exposure classifications from the global health aid data.

```{r merge in exposure}
exposure <- here("data-clean/exposure.rds") %>% readRDS()

plgha_woman_df <- dat %>%
  mutate(
    country = as_factor(country),
    country = droplevels(country),
    country = as.character(country),
    country = case_when(
      country == "Congo, Democratic Republic" ~ 
        "Democratic Republic of the Congo",
      T ~ country
    )
  ) %>%
  left_join(exposure, by = "country") %>%
  mutate(
    plgha_cmc =  12*(2017 - 1900) + 5, # primary "on" variable calculated as based on may 2017
    plgha_on = intfqcmc >= plgha_cmc,
    plgha_cmc_jan17 = 12*(2017 - 1900) + 1,
    plgha_on_jan17 = intfqcmc >= plgha_cmc_jan17,
    plgha_cmc_elect = 12*(2016 - 1900) + 11,
    plgha_on_elect = intfqcmc >= plgha_cmc_elect,
    plgha_cmc_jan18 = 12*(2018 - 1900) + 1,
    plgha_on_jan18 = intfqcmc >= plgha_cmc_jan18,
    exposure = factor(exposure,
                      levels = c("High", "Low"),
                      labels = c("High", "Low")),
    exposure = fct_relevel(exposure, "High", after = 1)
  ) %>%
  ungroup() %>%
  mutate(
     n_wmn = 1
  ) %>%
  labelled::set_variable_labels(
    pregnant = "Currently Pregnant",
    fpcurruse = "Current Contraceptive Use",
    fpstart = "Started Using Contraceptives",
    fpstartlarc = "Started Using LARC",
    fpstop = "Discontinued Using Contraceptives",
    fpemerg = "Emergency Contraceptive Use",
    birth1yr = "Gave Birth in the Last Year",
    n_wmn = "Number of Women",
    fplarc = "Current Use of LARC", 
    fpshort = "Current Use of Short-Acting Method", 
    fpmodern = "Current Use of Modern Method",
    fptrad = "Current Use of Traditional Method",
    young = "=1 if 15-19 years old",
    fppill = "Current Use of Birth Control Pills",
    fpinj = "Current Use of Injectable",
    fpimp = "Current Use of Implant",
    fpiud = "Current Use of IUD",
    fpcon = "Current Use of Male Condoms",
    fpfc = "Current Use of Female Condoms")
  
saveRDS(plgha_woman_df, file = here("data-clean/plgha_woman_df.Rds"))

```

